# Importation des librairie
library(ggplot2)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library(ggExtra)
library(ggcorrplot)
library(plotly)
## 
## Attachement du package : 'plotly'
## L'objet suivant est masqué depuis 'package:ggplot2':
## 
##     last_plot
## L'objet suivant est masqué depuis 'package:stats':
## 
##     filter
## L'objet suivant est masqué depuis 'package:graphics':
## 
##     layout
library(quantmod)
## Le chargement a nécessité le package : xts
## Le chargement a nécessité le package : zoo
## 
## Attachement du package : 'zoo'
## Les objets suivants sont masqués depuis 'package:base':
## 
##     as.Date, as.Date.numeric
## Le chargement a nécessité le package : TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggthemes)
library(waffle)
library(highcharter)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::first()  masks xts::first()
## x dplyr::lag()    masks stats::lag()
## x dplyr::last()   masks xts::last()
library(ggfortify)
## Registered S3 method overwritten by 'ggfortify':
##   method        from 
##   fortify.table ggalt

Nuages de points

theme_set(theme_bw())

data("midwest", package = "ggplot2")

head(midwest)
## # A tibble: 6 x 28
##     PID county   state  area poptotal popdensity popwhite popblack popamerindian
##   <int> <chr>    <chr> <dbl>    <int>      <dbl>    <int>    <int>         <int>
## 1   561 ADAMS    IL    0.052    66090      1271.    63917     1702            98
## 2   562 ALEXAND~ IL    0.014    10626       759      7054     3496            19
## 3   563 BOND     IL    0.022    14991       681.    14477      429            35
## 4   564 BOONE    IL    0.017    30806      1812.    29344      127            46
## 5   565 BROWN    IL    0.018     5836       324.     5264      547            14
## 6   566 BUREAU   IL    0.05     35688       714.    35157       50            65
## # ... with 19 more variables: popasian <int>, popother <int>, percwhite <dbl>,
## #   percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
## #   popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
## #   poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
## #   percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## #   percelderlypoverty <dbl>, inmetro <int>, category <chr>
gg <- ggplot(data = midwest, aes(x = area, y = poptotal))+
       geom_point()

gg

gg <- ggplot(data = midwest, aes(x = area, y = poptotal))+
       geom_point(aes(col = state, size = popdensity))

gg

gg <- ggplot(data = midwest, aes(x = area, y = poptotal)) +
       geom_point(aes(col = state, size = popdensity)) +
       geom_smooth(method = "lm", se = T)

gg
## `geom_smooth()` using formula 'y ~ x'

gg <- ggplot(data = midwest, aes(x = area, y = poptotal)) +
       geom_point(aes(col = state, size = popdensity)) +
       geom_smooth(method = "lm", se = T) +
       xlim(c(0, 0.1)) +
       ylim(c(0, 500000)) +
       labs(subtitle = "surface vs pop", 
            y = "population",
            x = "surface",
            title = "Nuage de points de la population en fonction de la surface du comté",
            caption = "Source : midwest dataset")

gg
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplotly(gg)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
# 

midwest_select <- midwest[midwest$poptotal > 350000 & 
                          midwest$poptotal <= 500000 & 
                          midwest$area >0.01 & 
                          midwest$area < 0.1, ]
midwest_select
## # A tibble: 6 x 28
##     PID county  state  area poptotal popdensity popwhite popblack popamerindian
##   <int> <chr>   <chr> <dbl>    <int>      <dbl>    <int>    <int>         <int>
## 1   659 WILL    IL    0.05    357313      7146.   303420    38361           692
## 2   707 LAKE    IN    0.03    475594     15853.   334203   116688           865
## 3  1221 GENESEE MI    0.037   430459     11634.   336651    84257          3132
## 4  2056 LUCAS   OH    0.021   462361     22017.   380155    68456          1164
## 5  2084 STARK   OH    0.034   367585     10811.   339421    25052           950
## 6  2993 DANE    WI    0.073   367085      5029.   344617    10511          1201
## # ... with 19 more variables: popasian <int>, popother <int>, percwhite <dbl>,
## #   percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
## #   popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
## #   poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
## #   percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## #   percelderlypoverty <dbl>, inmetro <int>, category <chr>
gg2 <- ggplot(data = midwest, aes(x = area, y = poptotal)) +
       geom_point(aes(col = state, size = popdensity)) +
       geom_smooth(method = "lm", se = T) +
       geom_encircle(aes(x = area, y = poptotal), # encercler une partie des donnnées
                     data = midwest_select, 
                     col = "red", 
                     expand = 0.08,
                     size = 2) +
       xlim(c(0, 0.1)) +
       ylim(c(0, 500000)) +
       labs(subtitle = "surface vs pop", 
            y = "population",
            x = "surface",
            title = "Nuage de points de la population en fonction de la surface du comté",
            caption = "Source : midwest dataset")

gg2
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Jitter plot : Nuage de points spécial

data("mpg")
head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa~
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa~
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa~
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa~
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa~
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa~
g <- ggplot(data = mpg, aes(x = cty, y = hwy))

g + geom_point() +
    geom_smooth(method = "lm") +
       labs(subtitle = "City vs Hwy",
            title = "Nuage de points qui se chevauchent",
            caption = "Source : mpg dataset")
## `geom_smooth()` using formula 'y ~ x'

# Les points ne se chevauchent plus
g <- g + geom_jitter(width = 0.5,
                size = 1) +
    geom_smooth(method = "lm") + 
       labs(subtitle = "City vs Hwy",
            title = "Kilométrage autoroute en fonction de kilométrage ville",
            caption = "Source : mpg dataset")
ggplotly(g)
## `geom_smooth()` using formula 'y ~ x'